
*------------------------------------------------------------------------------
* Clean performance audit data
*------------------------------------------------------------------------------

*1989
clear

import excel using "KY_Assessment_Quality_Data.xlsx", sheet("Table Data") cellrange(A2) firstrow

rename A county

rename res_study_cod res_cod_audit
rename res_studymedian res_med_audit
rename comm_studymedian comm_med_audit
rename comm_study_cod comm_cod_audit
rename farm_studymedian farm_med_audit
rename farm_study_cod farm_cod_audit

keep county *_audit

keep if res_cod_audit!=.

gen year=1989

save performance_audit_1989, replace

*2010
clear

import excel using "2010 Property Assessment Performance Audit Results.xlsx", firstrow

drop if B==.

drop H-K

rename County county

rename B res_med_audit
rename C res_cod_audit
rename D farm_med_audit
rename E farm_cod_audit
rename F comm_med_audit
rename G comm_cod_audit

*Some med ratios read in as percentages not levels, so don't need *100
foreach var of varlist *med_audit {
replace `var' = `var'*100 if `var'<90
format `var'  %6.1f
}

gen year=2010

save performance_audit_2010, replace


*2014
clear

import excel using "2014 Property Assessment Performance Audit Results.xlsx"

drop H

drop if B=="Res"
drop if B=="Median"

rename A county

replace county="McLean" if county=="Mclean"
replace county="Marion" if county=="Marlon"


rename B res_med_audit
rename C res_cod_audit
rename D farm_med_audit
rename E farm_cod_audit
rename F comm_med_audit
rename G comm_cod_audit

foreach var of varlist *_audit {
destring `var', replace
}


foreach var of varlist *med_audit {
replace `var' = `var'*100
format `var'  %6.1f
}

drop if res_med_audit==.

gen year=2014

save performance_audit_2014, replace

*2016
clear

import excel using "2016 Property Assessment Performance Audit Results.xlsx"


rename A county

rename B res_med_audit
rename C res_cod_audit
rename D farm_med_audit
rename E farm_cod_audit
rename F comm_med_audit
rename G comm_cod_audit


foreach var of varlist *med_audit {
replace `var' = `var'*100
format `var'  %6.1f
}

gen year=2016

save performance_audit_2016, replace



*2018

clear

import excel using "2018 Property Assessment Performance Audit Results - June 2021.xlsx", firstrow


rename County county

rename ResMedian res_med_audit
rename ResCOD res_cod_audit
rename FarmMedian farm_med_audit
rename FarmCOD farm_cod_audit
rename CommMedian comm_med_audit
rename CommCOD comm_cod_audit



gen year=2018


append using performance_audit_1989 performance_audit_2010 performance_audit_2014 ///
performance_audit_2016


* Create flag for county in 1989 audit
bysort county: egen no_years=count(year)

gen audit_1989=(no_years==5)

**Fix audit data entry issues

*2018 - confirmed from raw data that values need to be *100
*Spreadsheet is storing them as percentages, which get converted to levels when
*read in by Stata
gen res_med_audit1=res_med_audit
replace res_med_audit1=res_med_audit*100 if year==2018


*2014 - small smudge on original pdf caused OCR to read a leading 1
replace res_med_audit1=95.4 if year==2014 & county=="Boyle"

*2016 - Assuming the two values of 1.01 in 2016 are actually 101 (also need *100)
*They are entered that way in the original pdf, but the COD is clearly in the
*correct units, which would have used the median as a base. And the average
*deviation would go less than zero of the lower end (1.01-2.93 for Mercer)
replace res_med_audit1=res_med_audit1*100 if res_med_audit1<50

drop res_med_audit
rename res_med_audit1 res_med_audit

save performance_audit_clean, replace
